{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from gensim.test.utils import datapath, get_tmpfile\n",
    "from gensim.models import KeyedVectors\n",
    "from gensim.scripts.glove2word2vec import glove2word2vec\n",
    "import os"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "folder_dir = os.path.join(os.getcwd(), 'glove.6B')\n",
    "glove_file = datapath(folder_dir + '/glove.6B.50d.txt')\n",
    "tmp_file = get_tmpfile(os.path.join(os.getcwd(), 'glove_weights.txt'))\n",
    "_ = glove2word2vec(glove_file, tmp_file)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-2-0d448ef494bf>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mmodel\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mKeyedVectors\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mload_word2vec_format\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mget_tmpfile\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mos\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mos\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgetcwd\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'glove_weights.txt'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\gensim\\models\\keyedvectors.py\u001b[0m in \u001b[0;36mload_word2vec_format\u001b[1;34m(cls, fname, fvocab, binary, encoding, unicode_errors, limit, datatype)\u001b[0m\n\u001b[0;32m   1474\u001b[0m         return _load_word2vec_format(\n\u001b[0;32m   1475\u001b[0m             \u001b[0mcls\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfname\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfvocab\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mfvocab\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mbinary\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mbinary\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mencoding\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mencoding\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0municode_errors\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0municode_errors\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1476\u001b[1;33m             limit=limit, datatype=datatype)\n\u001b[0m\u001b[0;32m   1477\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1478\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0mget_keras_embedding\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtrain_embeddings\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\gensim\\models\\utils_any2vec.py\u001b[0m in \u001b[0;36m_load_word2vec_format\u001b[1;34m(cls, fname, fvocab, binary, encoding, unicode_errors, limit, datatype)\u001b[0m\n\u001b[0;32m    387\u001b[0m         \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    388\u001b[0m             \u001b[1;32mfor\u001b[0m \u001b[0mline_no\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mvocab_size\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 389\u001b[1;33m                 \u001b[0mline\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mfin\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreadline\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    390\u001b[0m                 \u001b[1;32mif\u001b[0m \u001b[0mline\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;34mb''\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    391\u001b[0m                     \u001b[1;32mraise\u001b[0m \u001b[0mEOFError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"unexpected end of input; is count incorrect or file otherwise damaged?\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "model = KeyedVectors.load_word2vec_format(get_tmpfile(os.path.join(os.getcwd(), 'glove_weights.txt')))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "from torch import nn\n",
    "\n",
    "weights = torch.FloatTensor(model.vectors)\n",
    "embedding = nn.Embedding.from_pretrained(weights)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch.nn as nn\n",
    "\n",
    "class SentimentNet(nn.Module):\n",
    "    def __init__(self, vocab_size, output_size, embedding_dim, hidden_dim, n_layers, drop_prob=0.5):\n",
    "        super(SentimentNet, self).__init__()\n",
    "        self.output_size = output_size\n",
    "        self.n_layers = n_layers\n",
    "        self.hidden_dim = hidden_dim\n",
    "        \n",
    "        self.embedding = nn.Embedding.from_pretrained(weights)\n",
    "        self.embedding.requires_grad = False\n",
    "        self.lstm = nn.LSTM(embedding_dim, hidden_dim, n_layers, dropout=drop_prob, batch_first=True, bidirectional=True)\n",
    "        self.dropout = nn.Dropout(0.2)\n",
    "        self.fc = nn.Linear(hidden_dim, output_size)\n",
    "        self.sigmoid = nn.Sigmoid()\n",
    "        \n",
    "    def forward(self, x, hidden):\n",
    "        batch_size = x.size(0)\n",
    "        x = x.long()\n",
    "        embeds = self.embedding(x)\n",
    "        lstm_out, hidden = self.lstm(embeds, hidden)\n",
    "        lstm_out = lstm_out.contiguous().view(-1, self.hidden_dim)\n",
    "        \n",
    "        out = self.dropout(lstm_out)\n",
    "        out = self.fc(out)\n",
    "        out = self.sigmoid(out)\n",
    "        \n",
    "        out = out.view(batch_size, -1)\n",
    "        out = out[:,-1]\n",
    "        return out, hidden\n",
    "    \n",
    "    def init_hidden(self, batch_size):\n",
    "        weight = next(self.parameters()).data\n",
    "        hidden = (weight.new(self.n_layers*2, batch_size, self.hidden_dim).zero_().to(device),\n",
    "                      weight.new(self.n_layers*2, batch_size, self.hidden_dim).zero_().to(device))\n",
    "        return hidden"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# torch.cuda.is_available() checks and returns a Boolean True if a GPU is available, else it'll return False\n",
    "is_cuda = torch.cuda.is_available()\n",
    "\n",
    "# If we have a GPU available, we'll set our device to GPU. We'll use this device variable later in our code.\n",
    "if is_cuda:\n",
    "    device = torch.device(\"cuda\")\n",
    "    print(\"GPU is available\")\n",
    "else:\n",
    "    device = torch.device(\"cpu\")\n",
    "    print(\"GPU not available, CPU used\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "vocab_size = 1\n",
    "output_size = 1\n",
    "embedding_dim = 400\n",
    "hidden_dim = 512\n",
    "n_layers = 2\n",
    "\n",
    "model = SentimentNet(vocab_size, output_size, embedding_dim, hidden_dim, n_layers)\n",
    "model.to(device)\n",
    "print(model)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "embedding(torch.LongTensor([1]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "ename": "AttributeError",
     "evalue": "'SentimentNet' object has no attribute 'wv'",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-10-aa19759b2b96>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mword_vectors\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwv\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[1;32m~\\AppData\\Roaming\\Python\\Python37\\site-packages\\torch\\nn\\modules\\module.py\u001b[0m in \u001b[0;36m__getattr__\u001b[1;34m(self, name)\u001b[0m\n\u001b[0;32m    533\u001b[0m                 \u001b[1;32mreturn\u001b[0m \u001b[0mmodules\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    534\u001b[0m         raise AttributeError(\"'{}' object has no attribute '{}'\".format(\n\u001b[1;32m--> 535\u001b[1;33m             type(self).__name__, name))\n\u001b[0m\u001b[0;32m    536\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    537\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0m__setattr__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mAttributeError\u001b[0m: 'SentimentNet' object has no attribute 'wv'"
     ]
    }
   ],
   "source": [
    "word_vectors = model.wv"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "word_vectors['computer']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
